The goal of the current analysis is to identify:
rnaseq_reads <- fread("~/GitHub/RNAseq_amelie/data/salmon_quant/Salmon_EstCount_ENSG.tsv")
ensembl <- useEnsembl(biomart="genes", dataset="hsapiens_gene_ensembl", mirror="asia")
bm <- getBM( attributes = c("external_gene_name","ensembl_gene_id"),
filters = "ensembl_gene_id",
values = rnaseq_reads$ENSG,
mart = ensembl)
bm <- as.data.table(bm)
rnaseq_reads <- merge.data.table(x = bm, y = rnaseq_reads,
by.x = "ensembl_gene_id", by.y = "ENSG",
all.y = TRUE)
rnaseq_reads[, ensembl_gene_id := NULL]
setnames(rnaseq_reads, "external_gene_name", "ENSG")
rnaseq_reads
\(\text{Weighted Recovery Ratio} = \left(1 - \frac{\text{Treatment} - \text{Ctrl}}{Stress - \text{Ctrl}}\right) \cdot \left|Stress - \text{Ctrl\right|\)
Rationale
To quantify the extent to which a treatment (PTS or PNS) restores gene expression altered by fatty acid (FA) stress back to the healthy baseline (Ctrl). This metric accounts for both the proportional recovery (directional reversion toward Ctrl) and the biological significance of the fatty acid stress-induced changes (magnitude of the FA-Ctrl shift).
Components
Proportional Recovery (Term between parenthesis): This term captures the relative shift of gene expression under treatment compared to the shift caused by H₂O₂. A value closer to 1 indicates near-complete recovery to Ctrl, while values < 0 indicate expression moving further away from Ctrl.
Biological Magnitude Weighting | Absolute term |: This term weights the proportional recovery by the magnitude of the expression change induced by H₂O₂. This means that genes with larger H₂O₂-induced shifts are given more importance.
Ranges and Interpretation
If you consider x as the Weighted Recovery Ratio of any given gene:
x < 0 : The treatment pushes expression levels further away from Ctrl in the same direction as the stress effect. Meaning the treatment exacerbated the stress-induced dysregulation. This is not necessarily bad, since a “dysregulation” mechanism could help restore homeostasis.
x ∼ 0: Minimal recovery or no effect of the treatment.
0 < x < 1: Partial recovery, the treatment was moderately effective in reversing oxidative stress-induced dysregulation.
x ≥ 1: Complete or “over-recovery”, the treatment effectively restores expression to healthy levels, potentially also overshooting healthy baseline.
We first calculate average VST values for all genes within each experimental condition.
rnaseq_vst <- data.table(ENSG = rnaseq_reads$ENSG,
DESeq2:::vst(as.matrix(ceiling(rnaseq_reads[,
.SD,
.SDcols = !"ENSG"]))))
## converting counts to integer mode
rnaseq_vst <- melt(rnaseq_vst, id.vars = "ENSG", variable.name = "Condition", value.name = "VST")
rnaseq_vst[, Condition := gsub("[1-3]", "", Condition)]
rnaseq_vst[, VST := mean(VST), by = c("ENSG", "Condition")]
rnaseq_vst <- dcast(unique(rnaseq_vst), formula = ENSG ~ Condition, value.var = "VST")
rnaseq_vst
Next, we calculate the recovery ratios for each experimental condition
rnaseq_rank <- rnaseq_vst
rnaseq_rank[, AbsoluteEffect := FA - CTRL]
rnaseq_rank[, PTS_Recovery := round((1 - ((PTS - CTRL)/(AbsoluteEffect))) * abs(AbsoluteEffect), 5)]
rnaseq_rank[, PNS_Recovery := round((1 - ((PNS - CTRL)/(AbsoluteEffect))) * abs(AbsoluteEffect), 5)]
rnaseq_rank[, PTS_Recovery := ifelse(AbsoluteEffect == 0, 0, PTS_Recovery)]
rnaseq_rank[, PNS_Recovery := ifelse(AbsoluteEffect == 0, 0, PNS_Recovery)]
rnaseq_rank <- rnaseq_rank[, .SD, .SDcols = c("ENSG", "PTS_Recovery", "PNS_Recovery")]
setorder(rnaseq_rank, PTS_Recovery)
rnaseq_rank
apply(rnaseq_rank[, .SD, .SDcols = -"ENSG"], MARGIN = 2, summary)
## PTS_Recovery PNS_Recovery
## Min. -0.924840000 -1.10422000
## 1st Qu. -0.016705000 0.00000000
## Median 0.000000000 0.00000000
## Mean 0.007973473 0.03113815
## 3rd Qu. 0.045287500 0.07102250
## Max. 2.493930000 2.41983000
recovery_dt <- melt(rnaseq_rank, id.vars = "ENSG", variable.name = "Condition", value.name = "WRR")
ggplot(recovery_dt, aes(x = WRR, fill = Condition)) +
geom_histogram(binwidth = 0.2) +
geom_vline(xintercept = 0, colour = "black", linetype = "dashed") +
scale_y_continuous(trans = "log1p", labels = comma, breaks = c(1, 10, 1000, 5000, 20000, 30000))+
facet_wrap(~Condition)
The histograms represent the recovery rates calculated on VST counts. Most genes showed a moderate to large recovery centered at 0. Based on the summary statistics and histograms, seems like PNS had the highest recovery while GEN had the strongest effect overall.
rnaseq_rank[, PTS_Classification := ifelse(PTS_Recovery < -0.5, "EnhancedResponse", "NoResponse")]
rnaseq_rank[, PTS_Classification := ifelse(PTS_Recovery > 0.5, "RecoveryResponse", PTS_Classification)]
rnaseq_rank[, PNS_Classification := ifelse(PNS_Recovery < -0.5, "EnhancedResponse", "NoResponse")]
rnaseq_rank[, PNS_Classification := ifelse(PNS_Recovery > 0.5, "RecoveryResponse", PNS_Classification)]
setcolorder(rnaseq_rank, c("ENSG", "PTS_Recovery", "PTS_Classification"))
rnaseq_rank
dt_plot <- rnaseq_rank[, .(ENSG, PTS_Classification, PNS_Classification)]
dt_plot <- melt(dt_plot, id.vars = "ENSG")
dt_plot[, variable := stringr:::str_remove(variable, "_Classification")]
ggplot(dt_plot, aes(x = value, fill = variable)) +
geom_bar(position=position_dodge()) +
facet_wrap( ~ variable) +
scale_y_continuous(trans = "log10") +
coord_flip() +
labs(x = "", y = "Number of genes (log10)",
fill = "Treatment")
upset_dt <- dt_plot[, .(ENSG, Class = paste(variable, value, sep = "_"))]
upset_dt <- dcast(upset_dt, ENSG ~ Class, fun.aggregate = length)
upset_mat <- as.matrix(upset_dt[, !"ENSG"])
rownames(upset_mat) <- upset_dt$ENSG
upset_mat <- make_comb_mat(upset_mat)
upset_mat
## A combination matrix with 6 sets and 7 combinations.
## ranges of combination set size: c(19, 28960).
## mode for the combination size: distinct.
## sets are on rows.
##
## Combination sets are:
## PNS_EnhancedResponse PNS_NoResponse PNS_RecoveryResponse PTS_EnhancedResponse PTS_NoResponse PTS_RecoveryResponse code size
## x x 100100 19
## x x 100010 19
## x x 010100 29
## x x 010010 28960
## x x 010001 33
## x x 001010 127
## x x 001001 57
##
## Sets are:
## set size
## PNS_EnhancedResponse 38
## PNS_NoResponse 29022
## PNS_RecoveryResponse 184
## PTS_EnhancedResponse 48
## PTS_NoResponse 29106
## PTS_RecoveryResponse 90
UpSet(upset_mat)
# PNS Recovery & PTS NoResponse
extract_comb(upset_mat, "001010")
## [1] "ADM2" "AKT3" "ALDH1A3" "ALDH1L2"
## [5] "ALDH3A2" "ANKRD1" "ASPH" "ATAT1"
## [9] "BLOC1S5-TXNDC5" "BOLA2B" "C1QTNF6" "C5orf34"
## [13] "CASTOR2" "CD24" "CDC42EP1" "CDC42EP2"
## [17] "CDKN1C" "CHAC1" "CLN8" "CNTNAP3C"
## [21] "CORO7" "CSF3" "CSGALNACT1" "CYP1A1"
## [25] "CYP4F11" "DAXX" "EPB41L4A" "EPHX1"
## [29] "ERRFI1" "ETNK2" "F3" "FADS1"
## [33] "FAM220A" "FAM83H" "FAP" "FLOT1"
## [37] "FOXO1" "FOXQ1" "FTH1" "FTL"
## [41] "GFUS" "GOLGA8B" "HES1" "HMOX1"
## [45] "HRK" "INTS3" "ITGB2" "ITGB4"
## [49] "KNTC1" "LTBP1" "LTBP4" "MAPK4"
## [53] "MARF1" "MARVELD1" "MCAM" "MEGF6"
## [57] "MELTF" "MLPH" "MME" "MT-ATP6"
## [61] "MT-ATP8" "MT-CYB" "MT-ND1" "MT-ND4"
## [65] "MT-ND4L" "MT-ND5" "MT-ND6" "MT1X"
## [69] "MTATP6P1" "MYC" "MYZAP" "NAV3"
## [73] "NDRG1" "NIBAN1" "NLRP1" "NOXA1"
## [77] "NPIPA1" "NPIPA7" "OPLAH" "OSGIN1"
## [81] "PAN2" "PCDH1" "PCLO" "PEDS1-UBE2V1"
## [85] "PIR" "PLCH2" "PPIF" "PPIP5K1P1"
## [89] "PPP1R11" "PPP2R3B" "PRDX1" "PRKACA"
## [93] "PRR5L" "PRSS23" "PSMB3" "PSMC1P1"
## [97] "RAB7B" "RAC1P2" "RGS2" "RNF39"
## [101] "RPL23AP47" "RPS9" "SCNN1A" "SCNN1G"
## [105] "SERPINB2" "SH3TC1" "SIK1" "SLC1A4"
## [109] "SLC6A9" "SQSTM1" "SRGAP3" "STARD10"
## [113] "SYNRG" "TGFB2" "TGM2" "THBS1"
## [117] "TIMP3" "TIPARP" "TKT" "TNS4"
## [121] "TRIM39" "TRMT6" "TXNRD1" "UNC5B"
## [125] "VGLL3" "VSTM2L" "ZBED6"
# PNS Enhanced & PTS NoResponse
extract_comb(upset_mat, "100010")
## [1] "AKAP12" "CHTF18" "DBP" "DCAF11"
## [5] "EME1" "F2R" "FBXO32" "GBE1"
## [9] "GPCPD1" "IFNAR2-IL10RB" "IFNGR2" "KIF21B"
## [13] "KLF10" "MAP1B" "MT-ND2" "RGPD5"
## [17] "RUFY1" "TMEM265" "VPS52"
# PTS Recovery & PNS NoResponse
extract_comb(upset_mat, "010001")
## [1] "ABCC2" "ANGPTL4" "AOX1" "ARMCX5-GPRASP2"
## [5] "BHLHE40" "CCNA1" "DCAF15" "DDIT4"
## [9] "DST" "IMPA2" "ITGB8" "KANSL1"
## [13] "KPNA2" "LAMA3" "NDUFA6" "NOTCH3"
## [17] "NSF" "NT5E" "PDK4" "PFDN6"
## [21] "PKN1" "POLR1H" "PPP1R10" "RPL13AP7"
## [25] "RPP21" "RXRB" "SYNJ2BP-COX16" "TBC1D3C"
## [29] "TRIM56" "TRIM61" "TSC22D1" "UBALD2"
## [33] "ZBTB22"
# PTS Enhanced & PNS NoResponse
extract_comb(upset_mat, "010100")
## [1] "ARL2-SNX15" "ATF6B" "C1S" "CDCA2" "CDK1"
## [6] "CELSR2" "CFB" "CHI3L1" "CXCL2" "DDIT3"
## [11] "DLL1" "DTL" "FAM156B" "GBP2" "GDF15"
## [16] "GFPT2" "H2AC19" "HERPUD1" "MFSD2A" "PIGW"
## [21] "PKD1P3" "S100A14" "SAA2" "SERPINB4" "SYNE2"
## [26] "TMT1A" "TP63" "TPX2" "ZC3H12A"
degs <- fread("~/GitHub/RNAseq_amelie/output/DEG_results.tsv")
degs <- degs[, .(external_gene_name, log2FoldChange, padj, Group)]
degs <- unique(degs[external_gene_name != ""])
# Add Categories of interest
degs[, WRR_category := "None"]
# PNS Recovery & PTS NoResponse
degs[external_gene_name %in% extract_comb(upset_mat, "001010"),
WRR_category := "PNS_Recovery"]
# PTS Recovery & PTS NoResponse
degs[external_gene_name %in% extract_comb(upset_mat, "001010"),
WRR_category := "PTS_Recovery"]
# PNS Enhanced & PTS NoResponse
degs[external_gene_name %in% extract_comb(upset_mat, "100010"),
WRR_category := "PNS_Enhanced"]
# PTS Enhanced & PTS NoResponse
degs[external_gene_name %in% extract_comb(upset_mat, "010100"),
WRR_category := "PTS_Enhanced"]
degs
temp <- degs[padj < alpha & WRR_category != "None" & Group == "PNS_vs_FA",
.(external_gene_name, log2FoldChange, WRR_category)]
setorder(temp, "log2FoldChange")
temp
temp <- degs[padj < alpha & WRR_category != "None" & Group == "PTS_vs_FA",
.(external_gene_name, log2FoldChange, WRR_category)]
setorder(temp, "log2FoldChange")
temp